************************************************
* Data Setup for SSDR Files 2013-2022          *
* Input: all .sav in "$raw\SSDR"               *
* Output: .dta files in "$raw\SSDR"            *
* Jonas Zhonghan Xie                           *
* January 2024                                 *
************************************************



global ssdr "$raw\SSDR"
global documentation "$raw\DESE Documentation"
global ssdr_save "$saves\SSDR"
*log using "${ssdr}\log", replace
// Transform SPSS data into .dta files

local files: dir "$ssdr" files "*.sav"
foreach f in `files'{
	local filename = substr("`f'",1,strlen("`f'")-4)
	import spss using "${ssdr}\\`f'", clear
	save "${ssdr}\\`filename'.dta",replace
}
//Transform SPSS data into .dta files

local files: dir "$ssdr" files "*.dta"
foreach f in `files'{
	local filename = substr("`f'",1,strlen("`f'")-4)
	use "${ssdr}\\`f'", clear
	keep SCH*
	bys SCH_NAME: gen unique = _n
	keep if unique == 1
	drop unique
	export excel using "${ssdr}\school\\`filename'.xlsx",replace
}

** Prepare the appendix of the DAT code
import excel "${documentation}\ssdr_appendix.xlsx", sheet("Sheet1") firstrow clear
labmask OT_cat,value(OT_cat_desc)
labmask OT_type,value(OT_type_desc)
save "${ssdr}\ssdr_appendix.dta",replace


cap program drop deslabmask
program define deslabmask
	args valvar lblvar
	destring `valvar', replace
	labmask `valvar', values(`lblvar')
	drop `lblvar'
end

capture program drop uniqueexp
program define uniqueexp
	syntax varlist,by(varlist min=1)
	foreach var of varlist `varlist'{
		cap drop obs 
		cap drop obs_unique
		cap drop `var'_sum
		bysort `by' `var': gen obs = _n
		gen obs_unique = obs == 1
		bysort `by': egen `var'_sum = sum(obs_unique)
		drop obs obs_unique
	}
end

cap label drop lbl_gender lbl_physinj lbl_grade lbl_lawenfref
label define lbl_gender 0 "Male" 1 "Female"
label define lbl_physinj 0 "N/A" 1 "N" 2 "Y"
label define lbl_grade 	-1 "PK" 0 "KP/KF/KT" 1 "01" 2 "02" 3 "03" 4 "04" 5 "05" 6 "06" ///
						7 "07" 8 "08" 9 "09" 10 "10" 11 "11" 12 "12" 13 "SP"
label define lbl_lawenfref 	0 "N/A" 1 "Court Referral" /// 
							2 "Ticket/Citation" 3 "Court Referral and Ticket/Citation"
		
capture program drop ssdrprocess
program define ssdrprocess
	** Individual-level variables
	cap rename (GRADESSDR RACESSDR RACE_DESCSSDR) (GRADE RACE RACE_DESC)
	destring SASID,replace
	format SASID %12.0f
	deslabmask RACE RACE_DESC
	deslabmask DIST_CODE DIST_NAME
	//deslabmask SCH_CODE SCH_NAME
	deslabmask PST_CODE PST_DESC
	deslabmask AET_CODE AET_DESC
	rename (DIST_CODE PST_CODE AET_CODE) (DIST PST AE)
	replace GENDER = "0" if GENDER == "M"
	replace GENDER = "1" if GENDER == "F"
	replace GENDER = "." if GENDER == "N"
	destring GENDER,replace
	
	tab RACE
	tab RACE,nol
	
	label values GENDER lbl_gender
	rename GENDER FEMALE
	replace GRADE="0" if GRADE=="KP"|GRADE=="KF"|GRADE=="KT";
	replace GRADE="-1" if GRADE=="PK";
	replace GRADE ="13" if GRADE=="SP";
	
	destring GRADE,replace
	label values GRADE lbl_grade

	** Incident variables
	destring INCIDENT_ID,replace
	format INCIDENT_ID %10.0f
	deslabmask DAT_CODE DAT_DESC
	rename DAT_CODE DAT

	/* VARIABLES TEMPORARILY NOT PROCESSED
	gen ARREST = 1 if SD_ARREST == "Y"
	replace ARREST = 0 if ARREST ==.
	label define lbl_arrest 1 "Y" 0 "N"
	label values ARREST lbl_arrest 

	gen LAWENFREF = SD_LAW_ENFORCEMENT
	destring LAWENFREF,replace
	
	label values LAWENFREF lbl_lawenfref
	drop SD_ARREST SD_LAW_ENFORCEMENT

	rename START_DATE SD
	rename DATE_RETURNED RD
	
	
	rename PHYSINJY PHYSINJ
	replace PHYSINJ = "2" if PHYSINJ == "Y"
	replace PHYSINJ = "1" if PHYSINJ == "N"
	replace PHYSINJ = "0" if PHYSINJ == "X"
	replace PHYSINJ = "0" if PHYSINJ == "x"
	destring PHYSINJ, replace
	label values PHYSINJ lbl_physinj
	*/

	forvalues x = 1/5{
		cap rename (OFFE`x'_CODE_NCES OFFE`x'_DESC_NCES) (OT`x' OT`x'_DESC)
	}
	
	rename START_DATE SD
	rename DATE_RETURNED RD

	** Create dummies for DAT categories
	tab DAT,gen(DAT_)
	cap gen DAT_6 = .
	cap gen DAT_7 = .
	rename 	(DAT_1 DAT_2 DAT_3 DAT_4 DAT_5 DAT_6 DAT_7) ///
			(DAT_inschsus DAT_outschsus DAT_exp DAT_remimpoff ///
			DAT_remschper DAT_rememr DAT_lawref)


	** Create the total suspension days and explusion days for individuals
	uniqueexp INCIDENT_ID, by(SASID)
	rename INCIDENT_ID_sum incident_sum
	cap destring DAYS_MISSED,replace
	bys SASID: egen daymissed_sum = sum(DAYS_MISSED)
	gen day_suspension = 0
	replace day_suspension = DAYS_MISSED if DAT_inschsus == 1 | DAT_outschsus == 1
	gen day_expulsion = 0
	replace day_expulsion = DAYS_MISSED if DAT_exp == 1
	gen day_others = 0
	replace day_others = DAYS_MISSED if DAT_remimpoff == 1 | DAT_remschper == 1 | ///
										DAT_rememr == 1 | DAT_lawref == 1
	foreach i of varlist day_suspension day_expulsion day_others{
		bys SASID: egen `i'_sum = sum(`i')
		drop `i'
		rename `i'_sum `i'
	}


	rename OT*_DESC OT_DESC*
	sort INCIDENT_ID
	gen RECORD = _n
	reshape long OT OT_DESC, i(SASID RECORD) // Reshape the dataset to see the type of incidents.
	replace OT = "" if OT == "500"  // Drop the Unassigned code
	keep if OT!=""
	merge m:1 OT using "${ssdr}\ssdr_appendix.dta", keepusing(OT_type)
	assert _merge == 3 if _merge != 2 // If assertion false, check OT type documentation
	keep if _merge == 3
	drop _merge
	tab OT_type,gen(OT_)

	foreach i of varlist OT_1-OT_5{
		bys RECORD: egen `i'_code = max(`i')
		replace `i'=`i'_code
		drop `i'
		rename `i'_code `i'
	}

	keep if _j == 1
	drop _j OT_type
	sort RECORD

	*** Number of incident by type

	bys SASID INCIDENT_ID: gen num = _n
	keep if num == 1
	drop num

	foreach i of varlist OT_1-OT_5{
		bys SASID: egen `i'_sum = sum(`i')
	}

	foreach i of varlist DAT_inschsus-DAT_lawref{
		bys SASID: egen `i'_sum = sum(`i')
	}


	*** Ever incident by type
	foreach i of varlist OT_1-OT_5{
		bys SASID: egen `i'_code = max(`i')
		replace `i'=`i'_code
		drop `i'
		rename `i'_code `i'
	}


	foreach i of varlist DAT_inschsus-DAT_lawref{
		bys SASID: egen `i'_code = max(`i')
		replace `i'=`i'_code
		drop `i'
		rename `i'_code `i'
	}

	** Collapse the dataset by getting the first observation of each individual
	bys SASID: gen num = _n
	keep if num == 1
	drop num OT OT_DESC AE DAYS_MISSED INCIDENT_ID DAT RECORD SD RD
	order DIST SCH_CODE SCH_NAME SASID GRADE FEMALE RACE PST incident_sum day* OT_* DAT_*

	rename OT_1 substance
	rename OT_2 property
	rename OT_3 sexlmisconduct
	rename OT_4 violence
	rename OT_5 others

	rename OT_1_sum substance_sum
	rename OT_2_sum property_sum
	rename OT_3_sum sexlmisconduct_sum
	rename OT_4_sum violence_sum
	rename OT_5_sum others_sum

	rename DAT_* *

	rename *,lower

	label variable dist "District"
	cap label variable sch_code "School Code"
	label variable sch_name "School Name"
	label variable sasid "SASID"
	// cap label variable dob "Date of Birth"
	label variable grade "Grade"
	label variable female "Female=1"
	label variable race "Race"
	label variable pst "Program Status"
	label variable incident_sum "# Incidents"
	label variable daymissed_sum "# Total Missed Days"
	label variable day_suspension "# Total Suspension Days"
	label variable day_expulsion "# Total Expulsion Days"
	label variable day_others "# Other Disciplinary Days"
	label variable inschsus "Had once In-School Suspension"
	label variable outschsus "Had once Out-of-School Suspension"
	label variable exp "Had once Explusion"
	label variable remimpoff "Had once Removed by Impaired Hearing Officer"
	label variable remschper "Had once Removed by School Personnel"
	label variable rememr "Had once Emergency Removal"
	label variable lawref "Had once Law Referral"
	label variable inschsus_sum "# In-School Suspension Incidents"
	label variable outschsus_sum "# Out-of-School Suspension Incidents"
	label variable exp_sum "# Explusion Incidents"
	label variable remimpoff_sum "# Removed by Impaired Hearing Officer Incidents"
	label variable remschper_sum "# Removed by School Personnel Incidents"
	label variable rememr_sum "# Emergency Removal Incidents"
	label variable lawref_sum "# Law Referral Incidents"
	label variable substance_sum "# Substances incidents"
	label variable property_sum "# Property incidents"
	label variable sexlmisconduct_sum "# Sexual Misconduct incidents"
	label variable violence_sum "# Violence incidents"
	label variable others_sum "# Other incidents"
	label variable substance "Had once Substances incidents" 
	label variable property "Had once Property incidents"
	label variable sexlmisconduct "Had once Sexual Misconduct incidents" 
	label variable violence "Had once Violence incidents"
	label variable others "Had once Other incidents"
end

capture program drop otrename
program define otrename
	forvalues i = 1/5{
	replace OFFE`i'_CODE_NCES = "0011" if OFFE`i'_CODE_NCES == "11"
	replace OFFE`i'_CODE_NCES = "0012" if OFFE`i'_CODE_NCES == "12"
	replace OFFE`i'_CODE_NCES = "0013" if OFFE`i'_CODE_NCES == "13"
	replace OFFE`i'_CODE_NCES = "0021" if OFFE`i'_CODE_NCES == "21"
	replace OFFE`i'_CODE_NCES = "0023" if OFFE`i'_CODE_NCES == "23"
	replace OFFE`i'_CODE_NCES = "0029" if OFFE`i'_CODE_NCES == "29"
	replace OFFE`i'_CODE_NCES = "0030" if OFFE`i'_CODE_NCES == "30"
	replace OFFE`i'_CODE_NCES = "0040" if OFFE`i'_CODE_NCES == "40"
	replace OFFE`i'_CODE_NCES = "0050" if OFFE`i'_CODE_NCES == "50"
	replace OFFE`i'_CODE_NCES = "0099" if OFFE`i'_CODE_NCES == "99"
	}
end
***********************************************
* Look into ssdr23

use "${ssdr}//ssdr23.dta",clear
keep SASID  GRADE GENDER RACE* SCH* INCIDENT_ID DIST* OFFE*_CODE_NCES ///
	OFFE*_DESC_NCES DAT_* START_DATE DATE_RETURNED DAYS_MISSED PST_* AET_* 
ssdrprocess
** Save the Temp Dataset
save "${ssdr_save}\ssdr23.dta",replace

***********************************************
* Look into ssdr22
use "${ssdr}//ssdr22 no name dob.dta",clear
keep SASID GRADE GENDER RACE* SCH* INCIDENT_ID DIST* OFFE*_CODE_NCES ///
	OFFE*_DESC_NCES DAT_* START_DATE DATE_RETURNED DAYS_MISSED PST_* AET_* 
ssdrprocess
save "${ssdr_save}\ssdr22.dta",replace

***********************************************
* Look into ssdr21
use "${ssdr}//ssdr21.dta",clear
keep SASID GRADE GENDER  RACE* SCH* INCIDENT_ID DIST* OFFE*_CODE_NCES ///
	OFFE*_DESC_NCES DAT_* START_DATE DATE_RETURNED DAYS_MISSED PST_* AET_* 
ssdrprocess
save "${ssdr_save}\ssdr21.dta",replace

***********************************************
* Look into ssdr20
use "${ssdr}//ssdr20.dta",clear
keep SASID GRADE GENDER  RACE* SCH* INCIDENT_ID DIST* OFFE*_CODE_NCES ///
	OFFE*_DESC_NCES DAT_* START_DATE DATE_RETURNED DAYS_MISSED PST_* AET_* 
otrename
ssdrprocess
save "${ssdr_save}\ssdr20.dta",replace

***********************************************
* Look into ssdr19
use "${ssdr}//ssdr19.dta",clear
keep SASID GRADE GENDER  RACE* SCH* INCIDENT_ID DIST* OFFE*_CODE_NCES ///
	OFFE*_DESC_NCES DAT_* START_DATE DATE_RETURNED DAYS_MISSED PST_* AET_* 
otrename
ssdrprocess
save "${ssdr_save}\ssdr19.dta",replace

***********************************************
* Look into ssdr18
use "${ssdr}//ssdr18.dta",clear
rename (OFFE*_CODE OFFE*_DESC) (OT* OT*_DESC)
keep SASID GRADE GENDER  RACE* SCH* INCIDENT_ID DIST* OT* ///
	OT*_DESC DAT_* START_DATE DATE_RETURNED DAYS_MISSED PST_* AET_* 
ssdrprocess
save "${ssdr_save}\ssdr18.dta",replace

***********************************************
* Look into ssdr17
use "${ssdr}//ssdr17.dta",clear
rename (OFFE*_CODE OFFE*_DESC) (OT* OT*_DESC)
keep SASID GRADE GENDER  RACE* SCH* INCIDENT_ID DIST* OT* ///
	OT*_DESC DAT_* START_DATE DATE_RETURNED DAYS_MISSED PST_* AET_* 
ssdrprocess
save "${ssdr_save}\ssdr17.dta",replace

***********************************************
* Look into ssdr16
use "${ssdr}//ssdr16.dta",clear
rename (OFFE*_CODE OFFE*_DESC) (OT* OT*_DESC)
keep SASID GRADE GENDER  RACE* SCH* INCIDENT_ID DIST* OT* ///
	OT*_DESC DAT_* START_DATE DATE_RETURNED DAYS_MISSED PST_* AET_* 
ssdrprocess
save "${ssdr_save}\ssdr16.dta",replace

***********************************************
* Look into ssdr15
use "${ssdr}//ssdr15.dta",clear
rename (OFFE*_CODE OFFE*_DESC) (OT* OT*_DESC)
keep SASID GRADE GENDER  RACE* SCH* INCIDENT_ID DIST* OT* ///
	OT*_DESC DAT_* START_DATE DATE_RETURNED DAYS_MISSED PST_* AET_* 
ssdrprocess
save "${ssdr_save}\ssdr15.dta",replace

***********************************************
* Look into ssdr14
use "${ssdr}//ssdr14.dta",clear
rename (OFFE# OFFE#_DESC) (OT# OT#_DESC)
keep SASID GRADE GENDER  RACE* SCH* INCIDENT_ID DIST* OT* ///
	OT*_DESC DAT_* START_DATE DATE_RETURNED DAYS_MISSED PST_* AET_* 
ssdrprocess
save "${ssdr_save}\ssdr14.dta",replace

***********************************************
* Look into ssdr13
use "${ssdr}//ssdr13.dta",clear
rename (OFFE# OFFE#_DESC) (OT# OT#_DESC)
keep SASID GRADE GENDER RACE* SCH* INCIDENT_ID DIST* OT* ///
	OT*_DESC DAT_* START_DATE DATE_RETURNED DAYS_MISSED PST_* AET_* 
ssdrprocess
save "${ssdr_save}\ssdr13.dta",replace

************************************************
* Append the datasets
clear
use "${ssdr_save}\ssdr23.dta",replace
gen year = 2023
forvalues year =13/22{
	append using "${ssdr_save}\ssdr`year'.dta"
	replace year = 2000+`year' if year == .
}
sort year sasid
order year sasid *
isid year sasid // check year and sasid uniquely identify the observations

save "${ssdr_save}\ssdr_2013-2023.dta",replace

log close